Zircon - Fuchsia 内核分析 - 启动（内核初始化） | 自由微信

查看原文

其他

Zircon - Fuchsia 内核分析 - 启动（内核初始化）

坑大看雪学院 2019-05-25

简介

前面已经介绍了 Zircon 内核启动的汇编代码部分，主要是一些 CPU 的初始化。

现在 prime CPU 已经来到了 C 世界的 lk_main() 函数。

其他 CPU 也来到了 arm64_secondary_entry()函数。

lk_main

lk_main() 是打开 Zircon 内核世界的大门，由 prime cpu 敲开，一眼望去你就会发现 lk_main 一行行代码目的都非常明确：

硬件环境初始化遵循 CPU 架构 -> 目标平台 -> 目标设备 的初始化顺序

线程早期初始化
调用全局构造函数
CPU 架构早期初始化
平台早期初始化
目标设备早期初始化(仅是 Hook)
虚拟内存预初始化
内核堆初始化
虚拟内存初始化
内核初始化

多核心初始化
计时器队列初始化

第二阶段初始化(由创建出的 bootstrap2 线程完成)

CPU 架构后期初始化
平台后期初始化
目标设备初始化(仅是 Hook)

bootstrap2 设为 prime cpu 的 IDLE 线程，Zircon 初始化完成。

每当 prime CPU 进行一个阶段的初始化时，都会通过 lk_primary_cpu_init_level() 函数通知其他 CPU 当前初始化的进度

lk_main 代码：

//内核初始化

// called from arch code

void lk_main() {

    // serial prints to console based on compile time switch

    dlog_bypass_init_early();

 

    // get us into some sort of thread context

    // 初始化线程表 *

    thread_init_early();

 

    // deal with any static constructors

    // 调用全局构造函数

    // 全局构造函数即打了 __attribute__ ((constructor)) 的函数，将被编译器编译到 .init 段中

    call_constructors();

 

    // early arch stuff

    lk_primary_cpu_init_level(LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_ARCH_EARLY - 1);

 

    // CPU 架构早期初始化 *

    arch_early_init();

 

    // do any super early platform initialization

    lk_primary_cpu_init_level(LK_INIT_LEVEL_ARCH_EARLY, LK_INIT_LEVEL_PLATFORM_EARLY - 1);

 

    // 平台早期初始化 *

    platform_early_init();

 

    // do any super early target initialization

    lk_primary_cpu_init_level(LK_INIT_LEVEL_PLATFORM_EARLY, LK_INIT_LEVEL_TARGET_EARLY - 1);

 

    // 目标设备初始化 *

    // 实际还没实现

    target_early_init();

 

    dprintf(INFO, "\nwelcome to Zircon\n\n");

 

    dprintf(INFO, "KASLR: .text section at %p\n", __code_start);

 

    lk_primary_cpu_init_level(LK_INIT_LEVEL_TARGET_EARLY, LK_INIT_LEVEL_VM_PREHEAP - 1);

    dprintf(SPEW, "initializing vm pre-heap\n");

 

    // 内核堆初始化之前 *

    // 主要是配置页表和虚拟内存

    vm_init_preheap();

 

    // bring up the kernel heap

    lk_primary_cpu_init_level(LK_INIT_LEVEL_VM_PREHEAP, LK_INIT_LEVEL_HEAP - 1);

    dprintf(SPEW, "initializing heap\n");

 

    // 内核堆初始化 *

    heap_init();

 

    lk_primary_cpu_init_level(LK_INIT_LEVEL_HEAP, LK_INIT_LEVEL_VM - 1);

    dprintf(SPEW, "initializing vm\n");

 

    // 虚拟内存初始化 *

    vm_init();

 

    // initialize the kernel

    lk_primary_cpu_init_level(LK_INIT_LEVEL_VM, LK_INIT_LEVEL_KERNEL - 1);

    dprintf(SPEW, "initializing kernel\n");

 

    // 内核初始化 *

    kernel_init();

 

    lk_primary_cpu_init_level(LK_INIT_LEVEL_KERNEL, LK_INIT_LEVEL_THREADING - 1);

 

    // create a thread to complete system initialization

    dprintf(SPEW, "creating bootstrap completion thread\n");

 

    // 创建 bootstrap2 线程

    // 由 bootstrap2 线程完成剩下的初始化工作

    // 虽说是在 bootstrap2 线程中跑，但此时没有开启任务调度，所以实际还是在本线程

    // 因此下面代码是顺序执行的

    thread_t* t = thread_create("bootstrap2", &bootstrap2, NULL, DEFAULT_PRIORITY);

    thread_set_cpu_affinity(t, cpu_num_to_mask(0));

    thread_detach(t);

    thread_resume(t);

 

    // become the idle thread and enable interrupts to start the scheduler

    // 所有任务完成，直接称为 prime CPU 的 IDLE 线程

    thread_become_idle();

}

线程早期初始化

thread_init_early():

创建空线程 bootstrap
如果开启了死锁检测机制则初始化该机制
初始化每个 CPU 的线程优先级表

重点在初始化线程优先级表：

sched_init_early():

和 linux 类似的数据结构：

void sched_init_early() {

    // initialize the run queues

    // 每个 CPU 一个表

    for (unsigned int cpu = 0; cpu < SMP_MAX_CPUS; cpu++)

        for (unsigned int i = 0; i < NUM_PRIORITIES; i++) {

            // 每个优先级一个链表

            list_initialize(&percpu[cpu].run_queue[i]);

        }

}

调用全局构造函数：

extern void (*const __init_array_start[])();

extern void (*const __init_array_end[])();

static void call_constructors() {

    for (void (*const* a)() = __init_array_start; a != __init_array_end; a++)

        (*a)();

}

函数上打了 attribute ((constructor)) 则为全局构造函数，编译器将其编译到 .init 段。

而 init_array_start 和 init_array_end 是该段的开始和结尾。

CPU 架构早期初始化

arm64_cpu_early_init() 这个函数每个 CPU 内核都会走到，非 prime CPU 通过 arm64_secondary_entry() 调用到这里。

这里主要是跟 CPU 特性相关的初始化：

检查并且记录每个 CPU 的指针
设置每个 CPU 的 EL1 异常向量表
设定系统控制寄存器(SCTLR), 用于控制标准内存和系统设备,并为在硬件内核中实现的功能提供状态信息
检查收集 CPU 支持的特性，并保存待用
配置 CPU 计数器等
配置调试模式
打开快速中断

//初始化每个 CPU 内核

static void arm64_cpu_early_init() {

    // Make sure the per cpu pointer is set up.

    // 检查 CPU 指针

    arm64_init_percpu_early();

 

    // 设置 EL1 的异常向量表

    // Set the vector base.

    ARM64_WRITE_SYSREG(VBAR_EL1, (uint64_t)&arm64_el1_exception_base);

 

 

    // 系统控制寄存器（SCTLR）用于控制标准内存和系统设备，并为在硬件内核中实现的功能提供状态信息。

    // https://www.jianshu.com/p/885913b7201c

    // Set some control bits in sctlr.

    uint64_t sctlr = ARM64_READ_SYSREG(sctlr_el1);

    sctlr |= SCTLR_EL1_UCI | SCTLR_EL1_UCT | SCTLR_EL1_DZE | SCTLR_EL1_SA0 | SCTLR_EL1_SA;

    sctlr &= ~SCTLR_EL1_AC;  // Disable alignment checking for EL1, EL0.

    ARM64_WRITE_SYSREG(sctlr_el1, sctlr);

 

    // Save all of the features of the cpu.

    // 收集 CPU 支持的 Feature *

    arm64_feature_init();

 

    // 打开 CPU 计数器， 读取这个 PMCCNTR_EL0 寄存器值，就可以知道当前 CPU 已运行了多少 Cycle。

    // Enable cycle counter.

    ARM64_WRITE_SYSREG(pmcr_el0, PMCR_EL0_ENABLE_BIT | PMCR_EL0_LONG_COUNTER_BIT);

    ARM64_WRITE_SYSREG(pmcntenset_el0, PMCNTENSET_EL0_ENABLE);

 

    // 使用户态可以读取计数寄存器

    // Enable user space access to cycle counter.

    ARM64_WRITE_SYSREG(pmuserenr_el0, PMUSERENR_EL0_ENABLE);

 

    // Enable Debug Exceptions by Disabling the OS Lock. The OSLAR_EL1 is a WO

    // register with only the low bit defined as OSLK. Write 0 to disable.

    ARM64_WRITE_SYSREG(oslar_el1, 0x0);

 

    // Enable user space access to virtual counter (CNTVCT_EL0).

    // 使用户态可以读取 virtual counter

    ARM64_WRITE_SYSREG(cntkctl_el1, CNTKCTL_EL1_ENABLE_VIRTUAL_COUNTER);

 

    // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.100048_0002_05_en/jfa1406793259505.html

    // 启用本地内核调试

    // 监视 debug 事件，启用断点/监视/向量调试功能

    ARM64_WRITE_SYSREG(mdscr_el1, MSDCR_EL1_INITIAL_VALUE);

 

    // 开启快速中断

    arch_enable_fiqs();

}

CPU 特性检查

arm64_feature_init():

检查一下特性：

Cache 信息
支持的加密算法
数据高速缓存的支持
浮点计算
SIMD，即单指令多数据集指令，例如向量运算
address space ID

// call on every cpu to save features

void arm64_feature_init() {

    // set up some global constants based on the boot cpu

    cpu_num_t cpu = arch_curr_cpu_num();

    if (cpu == 0) {

        // read the block size of DC ZVA

        uint64_t dczid = ARM64_READ_SYSREG(dczid_el0);

        uint32_t arm64_zva_shift = 0;

        if (BIT(dczid, 4) == 0) {

            arm64_zva_shift = (uint32_t)(ARM64_READ_SYSREG(dczid_el0) & 0xf) + 2;

        }

        ASSERT(arm64_zva_shift != 0); // for now, fail if DC ZVA is unavailable

        arm64_zva_size = (1u << arm64_zva_shift);

 

        // read the dcache and icache line size

        uint64_t ctr = ARM64_READ_SYSREG(ctr_el0);

        uint32_t arm64_dcache_shift = (uint32_t)BITS_SHIFT(ctr, 19, 16) + 2;

        arm64_dcache_size = (1u << arm64_dcache_shift);

        uint32_t arm64_icache_shift = (uint32_t)BITS(ctr, 3, 0) + 2;

        arm64_icache_size = (1u << arm64_icache_shift);

 

        // parse the ISA feature bits

        // 收集 CPU 支持的加密算法

        // 每个内核都要跑一次

        // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.100048_0002_05_en/jfa1406793234300.html

        arm64_features |= ZX_HAS_CPU_FEATURES;

        uint64_t isar0 = ARM64_READ_SYSREG(id_aa64isar0_el1);

        if (BITS_SHIFT(isar0, 7, 4) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_AES;

        }

        if (BITS_SHIFT(isar0, 7, 4) >= 2) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_PMULL;

        }

        if (BITS_SHIFT(isar0, 11, 8) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_SHA1;

        }

        if (BITS_SHIFT(isar0, 15, 12) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_SHA2;

        }

        if (BITS_SHIFT(isar0, 19, 16) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_CRC32;

        }

        if (BITS_SHIFT(isar0, 23, 20) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_ATOMICS;

        }

        if (BITS_SHIFT(isar0, 31, 28) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_RDM;

        }

        if (BITS_SHIFT(isar0, 35, 32) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_SHA3;

        }

        if (BITS_SHIFT(isar0, 39, 36) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_SM3;

        }

        if (BITS_SHIFT(isar0, 43, 40) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_SM4;

        }

        if (BITS_SHIFT(isar0, 47, 44) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_DP;

        }

 

        // 是否支持数据高速缓存

        // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.100048_0002_05_en/jfa1406793234300.html

        uint64_t isar1 = ARM64_READ_SYSREG(id_aa64isar1_el1);

        if (BITS_SHIFT(isar1, 3, 0) >= 1) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_DPB;

        }

 

        // 是否支持浮点计算

        // http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.100048_0002_05_en/jfa1406793234300.html

        uint64_t pfr0 = ARM64_READ_SYSREG(id_aa64pfr0_el1);

        if (BITS_SHIFT(pfr0, 19, 16) < 0b1111) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_FP;

        }

 

        // 是否支持 SIMD，即单指令多数据集指令，例如向量运算

        if (BITS_SHIFT(pfr0, 23, 20) < 0b1111) {

            arm64_features |= ZX_ARM64_FEATURE_ISA_ASIMD;

        }

    }

 

    // read the cache info for each cpu

    arm64_get_cache_info(&(cache_info[cpu]));

 

    // check to make sure implementation supports 16 bit asids

    // 是否支持 address space ID

    uint64_t mmfr0 = ARM64_READ_SYSREG(ID_AA64MMFR0_EL1);

    ASSERT((mmfr0 & ARM64_MMFR0_ASIDBITS_MASK) == ARM64_MMFR0_ASIDBITS_16);

}

平台早期初始化

主要处理和物理内存相关的以及 ZBI(Zircon 启动镜像)。

处理 ZBI (Zircon Boot Image)
初始化各个内核设备驱动
把内核镜像以及 Ramdisk 镜像所在的内存区域加入 PMM(物理内存管理器) 的保留内存区域列表
配置内存限制

// 平台初始化 ARM64 *

void platform_early_init(void) {

    // if the zbi_paddr variable is -1, it was not set

    // in start.S, so we are in a bad place.

 

    // 内核启动镜像的物理地址，这是一个内核启动参数

    if (zbi_paddr == -1UL) {

        panic("no zbi_paddr!\n");

    }

 

    // 返回内核启动镜像的虚拟地址

    void* zbi_vaddr = paddr_to_physmap(zbi_paddr);

 

 

    // initialize the boot memory reservation system

    // 把内核镜像所在的内存区域加入 PMM(物理内存管理器) 的保留内存区域列表

    boot_reserve_init();

 

    // ramdisk 的 base 地址

    if (zbi_vaddr && is_zbi_container(zbi_vaddr)) {

        zbi_header_t* header = (zbi_header_t*)zbi_vaddr;

 

        ramdisk_base = header;

        ramdisk_size = ROUNDUP(header->length + sizeof(*header), PAGE_SIZE);

    } else {

        panic("no bootdata!\n");

    }

 

    if (!ramdisk_base || !ramdisk_size) {

        panic("no ramdisk!\n");

    }

 

    zbi_header_t* zbi = reinterpret_cast<zbi_header_t*>(ramdisk_base);

 

    // 处理其他 zbi(Zircon Boot Image) *

    // Zircon 内核启动镜像在代码中被抽象成一个个 ZBI，不仅仅内核镜像是一个 ZBI，内核驱动/Ramdisk 等也是一个个 ZBI

    // walk the zbi structure and process all the items

    process_zbi(zbi);

 

    // is the cmdline option to bypass dlog set ?

    dlog_bypass_init();

 

    // bring up kernel drivers after we have mapped our peripheral ranges

    // 初始化内核设备驱动 *

    pdev_init(zbi);

 

    // Serial port should be active now

 

    // 读内核启动参数 halt-on-panic

    // Read cmdline after processing zbi, which may contain cmdline data.

    halt_on_panic = cmdline_get_bool("kernel.halt-on-panic", false);

 

    // Check if serial should be enabled

    // 串口是否打开

    const char* serial_mode = cmdline_get("kernel.serial");

    uart_disabled = (serial_mode != NULL && !strcmp(serial_mode, "none"));

 

    // add the ramdisk to the boot reserve memory list

 

    // 把 ramdisk 镜像所在的内存区域加入 PMM(物理内存管理器) 的保留内存区域列表

    paddr_t ramdisk_start_phys = physmap_to_paddr(ramdisk_base);

    paddr_t ramdisk_end_phys = ramdisk_start_phys + ramdisk_size;

    dprintf(INFO, "reserving ramdisk phys range [%#" PRIx64 ", %#" PRIx64 "]\n",

            ramdisk_start_phys, ramdisk_end_phys - 1);

    boot_reserve_add_range(ramdisk_start_phys, ramdisk_size);

 

 

    //如果配置了内存限制，则初始化内存限制

    // check if a memory limit was passed in via kernel.memory-limit-mb and

    // find memory ranges to use if one is found.

    zx_status_t status = memory_limit_init();

    if (status == ZX_OK) {

        // Figure out and add arenas based on the memory limit and our range of DRAM

        memory_limit_add_range(mem_arena.base, mem_arena.size, mem_arena);

        status = memory_limit_add_arenas(mem_arena);

    }

 

    // If no memory limit was found, or adding arenas from the range failed, then add

    // the existing global arena.

    if (status != ZX_OK) {

        dprintf(INFO, "memory limit lib returned an error (%d), falling back to default arena\n",

                status);

        pmm_add_arena(&mem_arena);

    }

 

    // tell the boot allocator to mark ranges we've reserved as off limits

    boot_reserve_wire();

}

首先预先简单了解两个概念，这个具体会在后续文章中分析：

ZBI

ZBI(Zircon Boot Image),Zircon 启动镜像，内核层在编译时最终出来的文件除了 Linux 传统的 boot.img, ramdisk.img 等，在 Zircon 中，还有很多种 bin 输出文件，他们在启动时被 bootloader 和 kernel.bin 一起加载到内存中，这些 bin 就叫做 ZBI。

ZBI 可以是：

内核镜像
Ramdisk
驱动
配置

......

如下宏：

#define ZBI_ALL_TYPES(macro) \

    macro(ZBI_TYPE_CONTAINER, "CONTAINER", ".bin") \

    macro(ZBI_TYPE_KERNEL_X64, "KERNEL_X64", ".bin") \

    macro(ZBI_TYPE_KERNEL_ARM64, "KERNEL_ARM64", ".bin") \

    macro(ZBI_TYPE_DISCARD, "DISCARD", ".bin") \

    macro(ZBI_TYPE_STORAGE_RAMDISK, "RAMDISK", ".bin") \

    macro(ZBI_TYPE_STORAGE_BOOTFS, "BOOTFS", ".bin") \

    macro(ZBI_TYPE_CMDLINE, "CMDLINE", ".txt") \

    macro(ZBI_TYPE_CRASHLOG, "CRASHLOG", ".bin") \

    macro(ZBI_TYPE_NVRAM, "NVRAM", ".bin") \

    macro(ZBI_TYPE_PLATFORM_ID, "PLATFORM_ID", ".bin") \

    macro(ZBI_TYPE_CPU_CONFIG, "CPU_CONFIG", ".bin") \

    macro(ZBI_TYPE_MEM_CONFIG, "MEM_CONFIG", ".bin") \

    macro(ZBI_TYPE_KERNEL_DRIVER, "KERNEL_DRIVER", ".bin") \

    macro(ZBI_TYPE_ACPI_RSDP, "ACPI_RSDP", ".bin") \

    macro(ZBI_TYPE_SMBIOS, "SMBIOS", ".bin") \

    macro(ZBI_TYPE_EFI_MEMORY_MAP, "EFI_MEMORY_MAP", ".bin") \

    macro(ZBI_TYPE_EFI_SYSTEM_TABLE, "EFI_SYSTEM_TABLE", ".bin") \

    macro(ZBI_TYPE_E820_TABLE, "E820_TABLE", ".bin") \

    macro(ZBI_TYPE_DEBUG_UART, "DEBUG_UART", ".bin") \

    macro(ZBI_TYPE_FRAMEBUFFER, "FRAMEBUFFER", ".bin") \

    macro(ZBI_TYPE_DRV_MAC_ADDRESS, "DRV_MAC_ADDRESS", ".bin") \

    macro(ZBI_TYPE_DRV_PARTITION_MAP, "DRV_PARTITION_MAP", ".bin") \

    macro(ZBI_TYPE_BOOT_CONFIG, "BOOT_CONFIG", ".bin") \

    macro(ZBI_TYPE_BOOT_VERSION, "BOOT_VERSION", ".bin")

ramdisk 是根 ZBI

PMM

PMM(Physical Memory Manager)，物理内存管理器，这是 Zircon 用来统一管理物理内存的机制。

现代计算机为了拓展规模，在无法在单片 die 中塞入更多核心的情况下发展出了 NUMA 架构，简单的来说就是使用多路 CPU，而内存是挂在每块 CPU 的内存控制器上的，也就是说，每块 CPU 都有自己管理的内存，这样内存就被分割成了一片片区域。

PMM 使用 PmmNode 数据结构表示每片内存。每个 CPU 内存控制器下的内存不可能都是插满的，这样的话就必然会有内存空间的断续，PMM 将一块连续的内存区域抽象成 PmmArena，这样：

PMM 下管理着多个 PmmNode，每个 PmmNode 下又管理着多个 PmmArena。

内核驱动初始化

pdev_init() -> pdev_run_hooks(LK_INIT_LEVEL_PLATFORM_EARLY) -> 遍历 Driver ZBI -> lk_pdev_init_struct.hook()

驱动的注册，以及数据结构：

typedef void (*lk_pdev_init_hook)(const void* driver_data, uint32_t length);

 

// for registering platform drivers

// 内核驱动结构

struct lk_pdev_init_struct {

    uint32_t type;          // driver type, as defined in <zircon/boot/kernel-drivers.h>

    lk_pdev_init_hook hook; // hook for driver init

    uint level;             // init level for the hook

    const char* name;

};

 

// 驱动注册宏

typedef void (*lk_pdev_init_hook)(const void* driver_data, uint32_t length);

 

// for registering platform drivers

// 内核驱动结构

struct lk_pdev_init_struct {

    uint32_t type;          // driver type, as defined in <zircon/boot/kernel-drivers.h>

    lk_pdev_init_hook hook; // hook for driver init

    uint level;             // init level for the hook

    const char* name;

};

 

// 驱动注册宏

#define LK_PDEV_INIT(_name, _type, _hook, _level)                                                                     

    __ALIGNED(sizeof(void*))                                                                                           \

    __USED __SECTION(".data.rel.ro.lk_pdev_init") static const struct lk_pdev_init_struct _dev_init_struct_##_name = { \

        .type = _type,                                                                                                 \

        .hook = _hook,                                                                                                 \

        .level = _level,                                                                                               \

        .name = #_name,                                                                                                \

    };

调用驱动初始化函数：

static void pdev_init_driver(uint32_t type, const void* driver_data, uint32_t length, uint level) {

    const struct lk_pdev_init_struct* ptr;

    for (ptr = __start_lk_pdev_init; ptr != __stop_lk_pdev_init; ptr++) {

        if (ptr->type == type && ptr->level == level) {

            ptr->hook(driver_data, length);

            return;

        }

    }

}

虚拟内存预初始化

在初始化内核堆之前，需要先初始化内核内存空间页表和虚拟内存，因为内核堆所管理的这片内存包含在此：

构造内核空间虚拟内存的抽象(VmAspace)
在页表中标记内核空间的内存已用
分配零页

VmAspace 是 Zircon 对虚拟内存的抽象，较为复杂，另做讨论。

void vm_init_preheap() {

    // allow the vmm a shot at initializing some of its data structures

    // 构造代表内核空间的 VmAspace 对象 

    VmAspace::KernelAspaceInitPreHeap();

 

    // 在页表中标记内存页已经使用

    // mark the physical pages used by the boot time allocator

    if (boot_alloc_end != boot_alloc_start) {

        dprintf(INFO, "VM: marking boot alloc used range [%#" PRIxPTR ", %#" PRIxPTR ")\n", boot_alloc_start,

                boot_alloc_end);

 

        MarkPagesInUsePhys(boot_alloc_start, boot_alloc_end - boot_alloc_start);

    }

 

    zx_status_t status;

 

// 内核随机分布，这是为了保护内核在内存中的位置，防止恶意代码获取

// 以下暂时掠过

#if !DISABLE_KASLR // Disable random memory padding for KASLR

    。。。。。

#endif

    // grab a page and mark it as the zero page

    // 分配零页

    status = pmm_alloc_page(0, &zero_page, &zero_page_paddr);

    DEBUG_ASSERT(status == ZX_OK);

    void* ptr = paddr_to_physmap(zero_page_paddr);

    DEBUG_ASSERT(ptr);

    arch_zero_page(ptr);

}

 

void VmAspace::KernelAspaceInitPreHeap() TA_NO_THREAD_SAFETY_ANALYSIS {

    // the singleton kernel address space

 

    // 构造一个内核空间单例，因为这个函数只会在启动时调用，所以是这个对象是单例 

    // VmAspace 即 Virtual Memory Address Space，代表当前 CPU 虚拟内存空间的抽象

    static VmAspace _kernel_aspace(KERNEL_ASPACE_BASE, KERNEL_ASPACE_SIZE, VmAspace::TYPE_KERNEL, "kernel");

 

    // the singleton dummy root vmar (used to break a reference cycle in

    // Destroy())

    static VmAddressRegionDummy dummy_vmar;

#if LK_DEBUGLEVEL > 1

    _kernel_aspace.Adopt();

    dummy_vmar.Adopt();

#endif

 

    dummy_root_vmar = &dummy_vmar;

 

    static VmAddressRegion _kernel_root_vmar(_kernel_aspace);

 

    _kernel_aspace.root_vmar_ = fbl::AdoptRef(&_kernel_root_vmar);

 

    // 初始化

    auto err = _kernel_aspace.Init();

    ASSERT(err >= 0);

 

    // save a pointer to the singleton kernel address space

    VmAspace::kernel_aspace_ = &_kernel_aspace;

    aspaces.push_front(kernel_aspace_);

}

内核堆初始化

Zircon 的内核堆由内部的 cmpctmalloc 实现。

具体实现还没有细看：

// cmpct 堆初始化

void cmpct_init(void) {

    LTRACE_ENTRY;

 

    // 初始化全局互斥锁

    // Create a mutex.

    mutex_init(&theheap.lock);

 

    // 初始化空闲列表

    // Initialize the free list.

    for (int i = 0; i < NUMBER_OF_BUCKETS; i++) {

        theheap.free_lists[i] = NULL;

    }

    for (int i = 0; i < BUCKET_WORDS; i++) {

        theheap.free_list_bits[i] = 0;

    }

 

    size_t initial_alloc = HEAP_GROW_SIZE - 2 * sizeof(header_t);

 

    theheap.remaining = 0;

 

    heap_grow(initial_alloc);

}

虚拟内存初始化

在vmm结构中标记内核已使用的虚拟地址。
根据内核使用的地址的区域，分别设置内存的保护。

void vm_init() {

    LTRACE_ENTRY;

 

    VmAspace* aspace = VmAspace::kernel_aspace();

 

    // 内核镜像的各个段，以及读写策略

    // we expect the kernel to be in a temporary mapping, define permanent

    // regions for those now

    struct temp_region {

        const char* name;

        vaddr_t base;

        size_t size;

        uint arch_mmu_flags;

    } regions[] = {

        {

            .name = "kernel_code",

            .base = (vaddr_t)__code_start,

            .size = ROUNDUP((uintptr_t)__code_end - (uintptr_t)__code_start, PAGE_SIZE),

            .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_EXECUTE,

        },

        {

            .name = "kernel_rodata",

            .base = (vaddr_t)__rodata_start,

            .size = ROUNDUP((uintptr_t)__rodata_end - (uintptr_t)__rodata_start, PAGE_SIZE),

            .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ,

        },

        {

            .name = "kernel_data",

            .base = (vaddr_t)__data_start,

            .size = ROUNDUP((uintptr_t)__data_end - (uintptr_t)__data_start, PAGE_SIZE),

            .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE,

        },

        {

            .name = "kernel_bss",

            .base = (vaddr_t)__bss_start,

            .size = ROUNDUP((uintptr_t)_end - (uintptr_t)__bss_start, PAGE_SIZE),

            .arch_mmu_flags = ARCH_MMU_FLAG_PERM_READ | ARCH_MMU_FLAG_PERM_WRITE,

        },

    };

 

    // 遍历上面的几个段，并设置策略

    for (uint i = 0; i < fbl::count_of(regions); ++i) {

        temp_region* region = &regions[i];

        ASSERT(IS_PAGE_ALIGNED(region->base));

 

        dprintf(INFO, "VM: reserving kernel region [%#" PRIxPTR ", %#" PRIxPTR ") flags %#x name '%s'\n",

                region->base, region->base + region->size, region->arch_mmu_flags, region->name);

 

        // 在vmm中标记一块虚拟内存，这块虚拟内存抽象为VmRegion类，拥有自己的底层mmu相关的配置

        zx_status_t status = aspace->ReserveSpace(region->name, region->size, region->base);

        ASSERT(status == ZX_OK);

        // 对某VmRegion对应的虚拟内存设置内存保护的相关参数

        status = ProtectRegion(aspace, region->base, region->arch_mmu_flags);

        ASSERT(status == ZX_OK);

    }

 

    // 标记映射表

    // reserve the kernel aspace where the physmap is

    aspace->ReserveSpace("physmap", PHYSMAP_SIZE, PHYSMAP_BASE);

 

// 随机内核布局

#if !DISABLE_KASLR // Disable random memory padding for KASLR

    。。。。。。

#endif

}

ReserveSpace：在 vmm 中标记一块虚拟内存，这块虚拟内存抽象为VmRegion类，拥有自己的底层mmu相关的配置。
ProtectRegion：对某VmRegion对应的虚拟内存设置内存保护的相关参数。

内核初始化

这部分逻辑很简单：

初始化多核心 MP
初始化计时器队列

void kernel_init(void) {

    dprintf(SPEW, "initializing mp\n");

 

    // 多核初始化 *

    mp_init();

 

    dprintf(SPEW, "initializing timers\n");

 

    // 计时器队列初始化 *

    timer_queue_init();

}

 

// 多核初始化

void mp_init(void) {

    // CPU 热插拔锁

    mutex_init(&mp.hotplug_lock);

    // 核间中断任务表初始化

    mp.ipi_task_lock = SPIN_LOCK_INITIAL_VALUE;

    for (uint i = 0; i < fbl::count_of(mp.ipi_task_list); ++i) {

        list_initialize(&mp.ipi_task_list[i]);

    }

}

 

// 初始化每个 CPU 的计时器队列

void timer_queue_init(void) {

    for (uint i = 0; i < SMP_MAX_CPUS; i++) {

        list_initialize(&percpu[i].timer_queue);

        percpu[i].preempt_timer_deadline = ZX_TIME_INFINITE;

        percpu[i].next_timer_deadline = ZX_TIME_INFINITE;

    }

}

bootstrap2 线程完成后期初始化

这里要注意，虽说是在 bootstrap2 线程中跑，但此时没有开启任务调度，所以实际还是在同一个 CPU 执行的，因此下面代码是顺序执行的。

// bootstrap2 线程工作函数

static int bootstrap2(void*) {

    dprintf(SPEW, "top of bootstrap2()\n");

 

    lk_primary_cpu_init_level(LK_INIT_LEVEL_THREADING, LK_INIT_LEVEL_ARCH - 1);

 

    // CPU 架构初始化 *

    arch_init();

 

    // initialize the rest of the platform

    dprintf(SPEW, "initializing platform\n");

    lk_primary_cpu_init_level(LK_INIT_LEVEL_ARCH, LK_INIT_LEVEL_PLATFORM - 1);

 

    // 平台初始化 *

    platform_init();

 

    // initialize the target

    dprintf(SPEW, "initializing target\n");

    lk_primary_cpu_init_level(LK_INIT_LEVEL_PLATFORM, LK_INIT_LEVEL_TARGET - 1);

 

    // 目标设备初始化

    // Hook 未实现

    target_init();

 

    dprintf(SPEW, "moving to last init level\n");

    lk_primary_cpu_init_level(LK_INIT_LEVEL_TARGET, LK_INIT_LEVEL_LAST);

 

    return 0;

}

CPU 架构初始化

初始化每个 CPU 的中断；
为每个非 prime CPU 创建 IDLE 线程；
释放启动锁，非 prime cpu 即将欢快的跑起来。

// 架构初始化，由 bootstrap2 线程完成

void arch_init() TA_NO_THREAD_SAFETY_ANALYSIS {

 

    // 主要任务是初始化每个 CPU 的中断

    arch_mp_init_percpu();

 

    dprintf(INFO, "ARM boot EL%lu\n", arm64_get_boot_el());

 

    arm64_feature_debug(true);

 

    // 读取启动参数中配置的 CPU 数量

    uint32_t max_cpus = arch_max_num_cpus();

    uint32_t cmdline_max_cpus = cmdline_get_uint32("kernel.smp.maxcpus", max_cpus);

    if (cmdline_max_cpus > max_cpus || cmdline_max_cpus <= 0) {

        printf("invalid kernel.smp.maxcpus value, defaulting to %u\n", max_cpus);

        cmdline_max_cpus = max_cpus;

    }

 

    secondaries_to_init = cmdline_max_cpus - 1;

 

    // 初始化非 prime CPU *

    // 主要任务是为非 prime cpu 创建 IDLE 线程

    lk_init_secondary_cpus(secondaries_to_init);

 

    LTRACEF("releasing %d secondary cpus\n", secondaries_to_init);

 

    // 释放启动锁

    // 前面在汇编代码等待的非 prime CPU 可以继续执行了

    // Release the secondary cpus.

    spin_unlock(&arm_boot_cpu_lock);

 

    // 为了让改动立刻写入内存，让其他 CPU 立刻可见，需要 flush cache

    // Flush the release of the lock, since the secondary cpus are running without cache on.

    arch_clean_cache_range((addr_t)&arm_boot_cpu_lock, sizeof(arm_boot_cpu_lock));

}

 

void lk_init_secondary_cpus(uint secondary_cpu_count) {

    if (secondary_cpu_count >= SMP_MAX_CPUS) {

        dprintf(CRITICAL, "Invalid secondary_cpu_count %u, SMP_MAX_CPUS %d\n",

                secondary_cpu_count, SMP_MAX_CPUS);

        secondary_cpu_count = SMP_MAX_CPUS - 1;

    }

 

    // 为每个 CPU 创建 IDLE 线程

    for (uint i = 0; i < secondary_cpu_count; i++) {

        thread_t* t = thread_create_idle_thread(i + 1);

        if (!t) {

            dprintf(CRITICAL, "could not allocate idle thread %u\n", i + 1);

            secondary_idle_thread_count = i;

            break;

        }

    }

    secondary_idle_thread_count = secondary_cpu_count;

}

平台初始化

主要和 CPU 热插拔相关

platform_init() -> platform_cpu_init()

// 初始化平台上的其他 CPU，CPU 热插拔？

static void platform_cpu_init(void) {

    // 遍历所有簇

    for (uint cluster = 0; cluster < cpu_cluster_count; cluster++) {

        // 遍历簇内所有 CPU 内核

        for (uint cpu = 0; cpu < cpu_cluster_cpus[cluster]; cpu++) {

 

            // 启动新增 CPU

            if (cluster != 0 || cpu != 0) {

                // create a stack for the cpu we're about to start

                zx_status_t status = arm64_create_secondary_stack(cluster, cpu);

                DEBUG_ASSERT(status == ZX_OK);

 

                // start the cpu

                status = platform_start_cpu(cluster, cpu);

 

                if (status != ZX_OK) {

                    // TODO(maniscalco): Is continuing really the right thing to do here?

 

                    // start failed, free the stack

                    zx_status_t status = arm64_free_secondary_stack(cluster, cpu);

                    DEBUG_ASSERT(status == ZX_OK);

                    continue;

                }

 

                // the cpu booted

                //

                // bootstrap thread is now responsible for freeing its stack

            }

        }

    }

}

arm64_secondary_entry

非 prime cpu 初始化代码

大部分前面都已经介绍过：

// called from assembly.

extern "C" void arm64_secondary_entry() {

    arm64_cpu_early_init();

 

    spin_lock(&arm_boot_cpu_lock);

    spin_unlock(&arm_boot_cpu_lock);

 

    uint cpu = arch_curr_cpu_num();

    thread_secondary_cpu_init_early(&_init_thread[cpu - 1]);

    // Run early secondary cpu init routines up to the threading level.

    lk_init_level(LK_INIT_FLAG_SECONDARY_CPUS, LK_INIT_LEVEL_EARLIEST, LK_INIT_LEVEL_THREADING - 1);

 

    arch_mp_init_percpu();

 

    arm64_feature_debug(false);

 

    lk_secondary_cpu_entry();

}

thread_secondary_cpu_init_early 为此 CPU 创建了一个占位的空线程。

lk_secondary_cpu_entry -> thread_secondary_cpu_entry：

标记此 CPU 启动完成，可以参与调度；
为此 CPU 初始化 DPC。

- End -

看雪ID：坑大

https://bbs.pediy.com/user-675677.htm

本文由看雪论坛坑大原创

转载请注明来自看雪社区

热门图书推荐：

戳立即购买！

征题正在火热进行中！

（晋级赛Q1即将于3月10日开启，敬请期待！）

热门文章阅读

1、Zircon - Fuchsia 内核分析 - 启动（平台初始化）

2、微信PC端技术研究：保存聊天语音

3、萌新对C++编写的动态库逆向分析

4、一个PESpin保护程序脱壳调试报告

热门课程推荐

公众号ID：ikanxue

官方微博：看雪安全

商务合作：wsc@kanxue.com

↙点击下方“阅读原文”

二湘：朱令去世一周年，清华学子控诉清华在朱令案中的冷血和无耻

李宜雪的良知卖了2万元，真正需要声援的是罗灿宏啊

中国紫砂博物馆出品！2025紫砂五福杯：五福云集，掌上藏品，宜藏宜礼！

故意按摩让女生“产生欲望”后发生关系，算性侵吗？

洗牌电商圈！阿哲放话全网：挑战抖音所有机制！爆全品类大牌！